library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.2     ✓ dplyr   1.0.6
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# load customer RFM dataset (choose yours instead)
fcsv <- "https://raw.githubusercontent.com/multidis/hult-retail-analytics/main/customer_segmentation/datasets/customers_30.csv"
cust_rfm <- read_csv(fcsv)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   CustomerID = col_double(),
##   recency = col_double(),
##   frequency = col_double(),
##   monetary = col_double()
## )
cust_rfm
## # A tibble: 3,000 x 4
##    CustomerID recency frequency monetary
##         <dbl>   <dbl>     <dbl>    <dbl>
##  1      14953      25         1     286.
##  2      16809      23         5     915.
##  3      18112      12         2     353.
##  4      13587      63         1     712.
##  5      14889     336         1     136.
##  6      14966      14         1     183.
##  7      13029      30         3    1248.
##  8      13650      16         6    1836.
##  9      17078      36         2     378.
## 10      13522      30         1     126.
## # … with 2,990 more rows
# explore customer metrics in the dataset
qplot(frequency, data=cust_rfm, binwidth=5)

quantile(cust_rfm$frequency, 0.7)
## 70% 
##   4
quantile(cust_rfm$frequency, 0.8)
## 80% 
##   6
qplot(recency, data=cust_rfm, binwidth=10)

quantile(cust_rfm$recency, 0.3)
## 30% 
##  21
qplot(monetary, data=cust_rfm, binwidth=1000)

qplot(monetary, data=cust_rfm, binwidth=500, xlim=c(0, 20000))
## Warning: Removed 31 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

quantile(cust_rfm$monetary, 0.7)
##      70% 
## 1315.188
# interactive scatter plot to explore most valuable customers
plot_ly(cust_rfm, x = ~frequency, y = ~monetary, color = ~recency,
        hoverinfo = "text", text = ~CustomerID) %>%
    add_markers()
# edit as needed: average values over customer subsets; this is just an example
cust_rfm %>%
  filter(frequency > quantile(cust_rfm$frequency, 0.7)) %>%
  filter(monetary > quantile(cust_rfm$monetary, 0.7)) %>%
  summarize(rec = mean(recency))
## # A tibble: 1 x 1
##     rec
##   <dbl>
## 1  25.7